From cde62b6cc81b19067a739d6f59faf0b1cf49cace Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Thu, 13 May 2004 15:09:41 +0000 Subject: [PATCH] bitkeeper revision 1.891.1.16 (40a38fb5auV2wZtbB0nLg2hIQ75DjA) Optimisations for new network IO model. Much better receive performance. --- xen/arch/i386/entry.S | 15 ++-- xen/common/dom_mem_ops.c | 64 ++++++---------- xen/common/domain.c | 5 +- xen/common/memory.c | 27 +++++++ xen/include/hypervisor-ifs/hypervisor-if.h | 1 + .../arch/xen/drivers/netif/backend/main.c | 74 ++++++++++--------- .../arch/xen/drivers/netif/frontend/main.c | 36 +++------ .../include/asm-xen/hypervisor.h | 14 ++++ 8 files changed, 129 insertions(+), 107 deletions(-) diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index b522f4f754..4e9163a75d 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -183,20 +183,22 @@ do_multicall: multicall_loop: pushl %ecx multicall_fault1: - pushl 20(%ebx) + pushl 20(%ebx) # args[4] multicall_fault2: - pushl 16(%ebx) + pushl 16(%ebx) # args[3] multicall_fault3: - pushl 12(%ebx) + pushl 12(%ebx) # args[2] multicall_fault4: - pushl 8(%ebx) + pushl 8(%ebx) # args[1] multicall_fault5: - pushl 4(%ebx) + pushl 4(%ebx) # args[0] multicall_fault6: - movl (%ebx),%eax + movl (%ebx),%eax # op andl $255,%eax call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) multicall_return_from_call: +multicall_fault7: + movl %eax,24(%ebx) # args[5] == result addl $20,%esp popl %ecx addl $(ARGS_PER_MULTICALL_ENTRY*4),%ebx @@ -745,6 +747,7 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(do_xen_version) .long SYMBOL_NAME(do_console_io) .long SYMBOL_NAME(do_physdev_op) + .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 25 */ .rept NR_syscalls-((.-hypervisor_call_table)/4) .long SYMBOL_NAME(do_ni_syscall) .endr diff --git a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c index 79d0bb1df1..fcc4977bee 100644 --- a/xen/common/dom_mem_ops.c +++ b/xen/common/dom_mem_ops.c @@ -18,24 +18,22 @@ static long alloc_dom_mem(struct task_struct *p, reservation_increase_t op) { - struct pfn_info *page; - unsigned long mpfn; /* machine frame number of current page */ - void *va; /* Xen-usable mapping of current page */ - unsigned long i; + struct pfn_info *page; + unsigned long i; - for ( i = 0; i < op.size; i++ ) + /* Leave some slack pages; e.g., for the network. */ + if ( unlikely(free_pfns < (op.size + (SLACK_DOMAIN_MEM_KILOBYTES >> + (PAGE_SHIFT-10)))) ) { - /* Leave some slack pages; e.g., for the network. */ - if ( unlikely(free_pfns < (SLACK_DOMAIN_MEM_KILOBYTES >> - (PAGE_SHIFT-10))) ) - { - DPRINTK("Not enough slack: %u %u\n", - free_pfns, - SLACK_DOMAIN_MEM_KILOBYTES >> (PAGE_SHIFT-10)); - break; - } + DPRINTK("Not enough slack: %u %u\n", + free_pfns, + SLACK_DOMAIN_MEM_KILOBYTES >> (PAGE_SHIFT-10)); + return 0; + } - /* NB. 'alloc_domain_page' does limit checking on pages per domain. */ + for ( i = 0; i < op.size; i++ ) + { + /* NB. 'alloc_domain_page' does limit-checking on pages per domain. */ if ( unlikely((page = alloc_domain_page(p)) == NULL) ) { DPRINTK("Could not allocate a frame\n"); @@ -43,14 +41,8 @@ static long alloc_dom_mem(struct task_struct *p, reservation_increase_t op) } /* Inform the domain of the new page's machine address. */ - mpfn = (unsigned long)(page - frame_table); - copy_to_user(op.pages, &mpfn, sizeof(mpfn)); - op.pages++; - - /* Zero out the page to prevent information leakage. */ - va = map_domain_mem(mpfn << PAGE_SHIFT); - memset(va, 0, PAGE_SIZE); - unmap_domain_mem(va); + if ( unlikely(put_user(page_to_pfn(page), &op.pages[i]) != 0) ) + break; } return i; @@ -58,22 +50,21 @@ static long alloc_dom_mem(struct task_struct *p, reservation_increase_t op) static long free_dom_mem(struct task_struct *p, reservation_decrease_t op) { - struct pfn_info *page; - unsigned long mpfn; /* machine frame number of current page */ - unsigned long i; - long rc = 0; - int need_flush = 0; + struct pfn_info *page; + unsigned long i, mpfn; + long rc = 0; for ( i = 0; i < op.size; i++ ) { - copy_from_user(&mpfn, op.pages, sizeof(mpfn)); - op.pages++; - if ( mpfn >= max_page ) + if ( unlikely(get_user(mpfn, &op.pages[i]) != 0) ) + break; + + if ( unlikely(mpfn >= max_page) ) { DPRINTK("Domain %llu page number out of range (%08lx>=%08lx)\n", p->domain, mpfn, max_page); rc = -EINVAL; - goto out; + break; } page = &frame_table[mpfn]; @@ -81,7 +72,7 @@ static long free_dom_mem(struct task_struct *p, reservation_decrease_t op) { DPRINTK("Bad page free for domain %llu\n", p->domain); rc = -EINVAL; - goto out; + break; } if ( test_and_clear_bit(_PGC_guest_pinned, &page->count_and_flags) ) @@ -93,13 +84,6 @@ static long free_dom_mem(struct task_struct *p, reservation_decrease_t op) put_page(page); } - out: - if ( need_flush ) - { - __flush_tlb(); - perfc_incr(need_flush_tlb_flush); - } - return rc ? rc : op.size; } diff --git a/xen/common/domain.c b/xen/common/domain.c index f19c05eaf2..c40c052425 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -324,8 +324,7 @@ struct pfn_info *alloc_domain_page(struct task_struct *p) page->type_and_flags = 0; if ( p != NULL ) { - if ( unlikely(in_irq()) ) - BUG(); + ASSERT(!in_irq()); wmb(); /* Domain pointer must be visible before updating refcnt. */ spin_lock(&p->page_list_lock); if ( unlikely(p->tot_pages >= p->max_pages) ) @@ -369,7 +368,7 @@ void free_domain_page(struct pfn_info *page) if ( !(page->count_and_flags & PGC_zombie) ) { page->tlbflush_timestamp = tlbflush_clock; - if (p) + if ( likely(p != NULL) ) { page->u.cpu_mask = 1 << p->processor; spin_lock(&p->page_list_lock); diff --git a/xen/common/memory.c b/xen/common/memory.c index 3f8ee288ee..87ddf4c917 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -1209,6 +1209,33 @@ int do_update_va_mapping(unsigned long page_nr, return err; } +int do_update_va_mapping_otherdomain(unsigned long page_nr, + unsigned long val, + unsigned long flags, + domid_t domid) +{ + unsigned int cpu = smp_processor_id(); + struct task_struct *p; + int rc; + + if ( unlikely(!IS_PRIV(current)) ) + return -EPERM; + + percpu_info[cpu].gps = p = find_domain_by_id(domid); + if ( unlikely(p == NULL) ) + { + MEM_LOG("Unknown domain '%llu'", domid); + return -ESRCH; + } + + rc = do_update_va_mapping(page_nr, val, flags); + + put_task_struct(p); + percpu_info[cpu].gps = NULL; + + return rc; +} + #ifndef NDEBUG /* diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 5cde5d46e4..efe34e2184 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -39,6 +39,7 @@ #define __HYPERVISOR_xen_version 22 #define __HYPERVISOR_console_io 23 #define __HYPERVISOR_physdev_op 24 +#define __HYPERVISOR_update_va_mapping_otherdomain 25 /* * MULTICALLS diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c index 8178608959..7a32149404 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c @@ -100,8 +100,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_t *netif = (netif_t *)dev->priv; s8 status = NETIF_RSP_OKAY; u16 size=0, id; - mmu_update_t mmu[6]; - pgd_t *pgd; pmd_t *pmd; pte_t *pte; + mmu_update_t mmu[4]; + multicall_entry_t mcl[2]; unsigned long vdata, mdata=0, new_mfn; /* Drop the packet if the target domain has no receive buffers. */ @@ -148,34 +148,37 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) new_mfn = get_new_mfn(); - pgd = pgd_offset_k( (vdata & PAGE_MASK)); - pmd = pmd_offset(pgd, (vdata & PAGE_MASK)); - pte = pte_offset(pmd, (vdata & PAGE_MASK)); + mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu[0].val = __pa(vdata) >> PAGE_SHIFT; - mmu[0].val = (unsigned long)(netif->domid<<16) & ~0xFFFFUL; - mmu[0].ptr = (unsigned long)(netif->domid<< 0) & ~0xFFFFUL; - mmu[1].val = (unsigned long)(netif->domid>>16) & ~0xFFFFUL; - mmu[1].ptr = (unsigned long)(netif->domid>>32) & ~0xFFFFUL; - mmu[0].ptr |= MMU_EXTENDED_COMMAND; - mmu[0].val |= MMUEXT_SET_SUBJECTDOM_L; + mmu[1].val = (unsigned long)(netif->domid<<16) & ~0xFFFFUL; + mmu[1].ptr = (unsigned long)(netif->domid<< 0) & ~0xFFFFUL; + mmu[2].val = (unsigned long)(netif->domid>>16) & ~0xFFFFUL; + mmu[2].ptr = (unsigned long)(netif->domid>>32) & ~0xFFFFUL; mmu[1].ptr |= MMU_EXTENDED_COMMAND; - mmu[1].val |= MMUEXT_SET_SUBJECTDOM_H; - - mmu[2].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND; - mmu[2].val = MMUEXT_REASSIGN_PAGE; - - mmu[3].ptr = MMU_EXTENDED_COMMAND; - mmu[3].val = MMUEXT_RESET_SUBJECTDOM; - - mmu[4].ptr = virt_to_machine(pte); - mmu[4].val = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL; - - mmu[5].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; - mmu[5].val = __pa(vdata) >> PAGE_SHIFT; - - if ( unlikely(HYPERVISOR_mmu_update(mmu, 6) < 0) ) + mmu[1].val |= MMUEXT_SET_SUBJECTDOM_L; + mmu[2].ptr |= MMU_EXTENDED_COMMAND; + mmu[2].val |= MMUEXT_SET_SUBJECTDOM_H; + + mmu[3].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND; + mmu[3].val = MMUEXT_REASSIGN_PAGE; + + mcl[0].op = __HYPERVISOR_mmu_update; + mcl[0].args[0] = (unsigned long)mmu; + mcl[0].args[1] = 4; + mcl[1].op = __HYPERVISOR_update_va_mapping; + mcl[1].args[0] = vdata >> PAGE_SHIFT; + mcl[1].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL; + mcl[1].args[2] = UVMF_INVLPG; + + (void)HYPERVISOR_multicall(mcl, 2); + if ( mcl[0].args[5] != 0 ) { DPRINTK("Failed MMU update transferring to DOM%llu\n", netif->domid); + (void)HYPERVISOR_update_va_mapping( + vdata >> PAGE_SHIFT, + (pte_t) { (mdata & PAGE_MASK) | __PAGE_KERNEL }, + UVMF_INVLPG); dealloc_mfn(new_mfn); status = NETIF_RSP_ERROR; goto out; @@ -183,6 +186,10 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) phys_to_machine_mapping[__pa(vdata) >> PAGE_SHIFT] = new_mfn; + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + netif->stats.rx_bytes += size; netif->stats.rx_packets++; @@ -261,7 +268,6 @@ static void net_tx_action(unsigned long unused) netif_tx_request_t txreq; u16 pending_idx; NETIF_RING_IDX i; - pgprot_t prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED); struct page *page; while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && @@ -334,10 +340,10 @@ static void net_tx_action(unsigned long unused) pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - if ( direct_remap_area_pages(&init_mm, - MMAP_VADDR(pending_idx), - txreq.addr & PAGE_MASK, - PAGE_SIZE, prot, netif->domid) != 0 ) + if ( HYPERVISOR_update_va_mapping_otherdomain( + MMAP_VADDR(pending_idx) >> PAGE_SHIFT, + (pte_t) { (txreq.addr & PAGE_MASK) | __PAGE_KERNEL }, + 0, netif->domid) != 0 ) { DPRINTK("Bad page frame\n"); make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); @@ -352,7 +358,8 @@ static void net_tx_action(unsigned long unused) DPRINTK("Can't allocate a skb in start_xmit.\n"); make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); netif_put(netif); - vmfree_area_pages(MMAP_VADDR(pending_idx), PAGE_SIZE); + HYPERVISOR_update_va_mapping(MMAP_VADDR(pending_idx) >> PAGE_SHIFT, + (pte_t) { 0 }, UVMF_INVLPG); break; } @@ -401,7 +408,8 @@ static void netif_page_release(struct page *page) netif = pending_netif[pending_idx]; - vmfree_area_pages(MMAP_VADDR(pending_idx), PAGE_SIZE); + HYPERVISOR_update_va_mapping(MMAP_VADDR(pending_idx) >> PAGE_SHIFT, + (pte_t) { 0 }, UVMF_INVLPG); spin_lock(&netif->tx_lock); make_tx_response(netif, pending_id[pending_idx], NETIF_RSP_OKAY); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c index cc5ac31e82..ff73952e88 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c @@ -171,16 +171,6 @@ static void network_tx_buf_gc(struct net_device *dev) } -static inline pte_t *get_ppte(void *addr) -{ - pgd_t *pgd; pmd_t *pmd; pte_t *pte; - pgd = pgd_offset_k( (unsigned long)addr); - pmd = pmd_offset(pgd, (unsigned long)addr); - pte = pte_offset(pmd, (unsigned long)addr); - return pte; -} - - static void network_alloc_rx_buffers(struct net_device *dev) { unsigned short id; @@ -190,7 +180,6 @@ static void network_alloc_rx_buffers(struct net_device *dev) dom_mem_op_t op; unsigned long pfn_array[NETIF_RX_RING_SIZE]; int ret, nr_pfns = 0; - pte_t *pte; /* Make sure the batch is large enough to be worthwhile (1/2 ring). */ if ( unlikely((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) || @@ -212,9 +201,9 @@ static void network_alloc_rx_buffers(struct net_device *dev) np->rx->ring[MASK_NET_RX_IDX(i)].req.id = id; - pte = get_ppte(skb->head); - pfn_array[nr_pfns++] = pte->pte_low >> PAGE_SHIFT; - queue_l1_entry_update(pte, 0); + pfn_array[nr_pfns++] = virt_to_machine(skb->head) >> PAGE_SHIFT; + HYPERVISOR_update_va_mapping((unsigned long)skb->head >> PAGE_SHIFT, + (pte_t) { 0 }, UVMF_INVLPG); } while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE ); @@ -309,8 +298,7 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) struct sk_buff *skb; netif_rx_response_t *rx; NETIF_RING_IDX i; - mmu_update_t mmu[2]; - pte_t *pte; + mmu_update_t mmu; spin_lock_irqsave(&np->tx_lock, flags); network_tx_buf_gc(dev); @@ -334,13 +322,14 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) } /* Remap the page. */ - pte = get_ppte(skb->head); - mmu[0].ptr = virt_to_machine(pte); - mmu[0].val = (rx->addr & PAGE_MASK) | __PAGE_KERNEL; - mmu[1].ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; - mmu[1].val = __pa(skb->head) >> PAGE_SHIFT; - if ( HYPERVISOR_mmu_update(mmu, 2) != 0 ) + mmu.ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; + mmu.val = __pa(skb->head) >> PAGE_SHIFT; + if ( HYPERVISOR_mmu_update(&mmu, 1) != 0 ) BUG(); + HYPERVISOR_update_va_mapping((unsigned long)skb->head >> PAGE_SHIFT, + (pte_t) { (rx->addr & PAGE_MASK) | + __PAGE_KERNEL }, + 0); phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = rx->addr >> PAGE_SHIFT; @@ -352,9 +341,6 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; - - phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = - (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK); skb_put(skb, rx->status); diff --git a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h index c355ec5381..cb841f3fb7 100644 --- a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h +++ b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h @@ -459,4 +459,18 @@ static inline int HYPERVISOR_physdev_op(void *physdev_op) return ret; } +static inline int HYPERVISOR_update_va_mapping_otherdomain( + unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping_otherdomain), + "b" (page_nr), "c" ((new_val).pte_low), "d" (flags), + "S" ((unsigned long)domid), "D" ((unsigned long)(domid>>32)) : + "memory" ); + + return ret; +} + #endif /* __HYPERVISOR_H__ */ -- 2.30.2